Représentation sous forme de listes pour les deux réseaux sociaux :
reseauA = [5,[ [0,2],[0,1],[0,3],[1,2],[2,3] ]]
reseauB = [5, [ [0,1],[1,2],[2,4],[4,3],[3,1] ]]
def creerReseauVide(n):
return [n, []]
r = creerReseauVide(5)
def estUnLienEntre(paire,i,j):
return (i in paire) and (j in paire)
estUnLienEntre([1,0],0,1)
def sontAmis(reseau,i,j):
return ( ([i,j] in reseau[1]) or [j,i] in reseau[1])
sontAmis(reseauB,3,4)
Soit m la taille de reseau[1]
La complexité de cette fonction : 2m + 1 = O(m)
def declareAmis(reseau,i,j):
if (not sontAmis(reseau,i,j) ):
reseau[1].append([i,j])
sontAmis(reseauB,3,5)
declareAmis(reseauB,3,5)
sontAmis(reseauB,3,5)
Soit m la taille de reseau[1]
if (not sontAmis(reseau,i,j) ): 2m+1
reseau[1].append([i,j]) 2
La complexité de cette fonction : 2m + 3 = O(m)
def listeDesAmisDe(reseau,i):
l=[]
for r in reseau[1]:
if (i == r[0]):
l.append(r[1])
elif (i == r[1]):
l.append(r[0])
return l
listeDesAmisDe(reseauB,1)
Soit m la taille de reseau[1]
for r in reseau[1]: 1
if (i == r[0]): 2m
l.append(r[1])
elif (i == r[1]): 2m
l.append(r[0]) 2m
return l 1
La complexité de cette fonction : 6m + 2 = O(m)
parentA : [6,1,1,3,4,5,1,5,5,7]
parentB : [3,9,0,3,9,4,4,7,1,9]
def creerPartitionEnSingletons(n):
return [i for i in range(n)]
creerPartitionEnSingletons(5)
def representant(parent,i):
try:
parent[i]
except IndexError:
return "error"
x = i
while (True):
if parent[x] != x:
x = parent[x]
else:
return x
parent = [6,9,3,3,3,5,5,5,1,9,10,1,4,9,11,9]
representant(parent,14)
Le pire des cas est quand i est le derniere element du groupe et que le tableau contient un seul groupe ;
try:
parent[i] 1
except IndexError:
return "error"
x = i 1
while (True):
if parent[x] != x: 2n-1
x = parent[x] 2n-1
else:
return x 1
La complexité de cette foncion est 4n + 1 = O(n)
def fusion(parent,i,j):
p = representant(parent,i)
q = representant(parent,j)
parent[p] = q
fusion(parent,6,1)
representant(parent,6)
def representant_v2(parent,i):
try:
parent[i]
except IndexError:
return "error"
x = i
l=[]
while (True):
if parent[x] != x:
l.append(x)
x = parent[x]
else:
for e in l:
parent[e] = x
return x
representant_v2(parent,0)
representant(parent,0)
def listeDesGroupes(parent):
p = parent.copy()
for i in range(len(p)):
representant(p,i)
unique = list(set(p))
l=[]
for u in unique:
z = []
for i in range(len(parent)):
if (u == p[i]):
z.append(i)
l.append(z)
return l
parent
listeDesGroupes(parent)
listeDesGroupes(parent)
import random
def coupeMinimumRandomisee(reseau):
P = creerPartitionEnSingletons(reseau[0])
c = 1
while (len(listeDesGroupes(P)) >= 3 and (len(reseau[1]) != c)):
i = random.randint(0,len(reseau[1])-c)
l = reseau[1][i]
if( representant_v2(i=l[0],parent=P) != representant_v2(i=l[1],parent=P) ):
fusion(P,l[0],l[1])
reseau[1].remove(l)
reseau[1].append(l)
c = c +1
for c in range(len(reseau[1])):
j = 0
while (len(listeDesGroupes(P)) >= 3 ):
if( representant_v2(i=c,parent=P) != representant_v2(i=j,parent=P) ):
fusion(P,i,j)
j = j+1
return P
lg = listeDesGroupes(coupeMinimumRandomisee(reseauA))
lg
def tailleCoupe(reseau, parent):
c = 0
lg = listeDesGroupes(coupeMinimumRandomisee(reseau))
for i in lg[0]:
for j in lg[1]:
if( representant_v2(i,parent) == representant_v2(j,parent) ):
c = c+1
Les données ont été extraites dans le fichier "twitter_data.ipynb"
Première data sur #JamalKashoggi de 2018-10-17 à 2018-10-19 :
à peu près 18 000 lignes de tweet/retweet
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob
from networkx.algorithms import community
import numpy as np
from geotext import GeoText
data = pd.read_csv("JamalKashoggi2.csv")
data = data.drop_duplicates("tweet_id")
data.shape
data_s = data[data["retweeted_status"] != 0]
data_s = data_s.sort_values("created_at",ascending=True)
data_s["polarity"] = [TextBlob(x).sentiment.polarity for x in data_s["text"]]
data_s["subjectivity"] = [TextBlob(x).sentiment.subjectivity for x in data_s["text"]]
"""data_s3 = data_s.copy()
data_s3 = data_s3.sort_values(by="created_at")
data_s3["order"] = [str(i)+",18000" for i in range(2000,2000+len(data_s3))]
data_s3["retweeted_status"] = data_s3["retweeted_status"].astype(str)
data_s3["tweet_id"] = data_s3["tweet_id"].astype(str)
data_s3.to_excel("JamalKashoggi2_forgephi.xlsx",index=False)"""
data_s.iloc[2:].head()
h = nx.DiGraph()
l = []
color_map = []
for d in data_s.iloc[:8000].values:
if (d[10] < -0.1):
color_map.append("#ff4c4c")
elif (d[10] > 0.1):
color_map.append("#659D32")
else:
color_map.append("#99CCFF")
l.append((d[7],d[0]))
len(l)
# 'node_size': 10,
# 'node_color': "#99CCFF",
h.add_edges_from(l)
#pos=nx.spring_layout(h)
options = {
'line_color': 'grey',
'linewidths': 0,
'width': 0.1,
}
plt.figure(figsize=(17,17))
nx.draw_kamada_kawai(h,**options,node_color=color_map,
node_size=[v[1]+5 for v in nx.degree(h)])
#nx.draw_spring(h,**options)
#plt.show()
plt.savefig("graph_f1.png", dpi=1000)
plt.savefig("graph_f1.pdf")
print(nx.info(h))
nx.density(h)
plt.bar(["Neutre","Negatif","Positif"],[color_map.count("#99CCFF"),color_map.count("#ff4c4c"),color_map.count("#659D32")])
plt.savefig("plot_f1.png")
bc = nx.betweenness_centrality(h)
key = [key for key, value in bc.items() ]
value = [value for key, value in bc.items() ]
bc_data = pd.DataFrame({"key":key,"value":value})
data_s.join
bc_data.join(data_s, lsuffix='key', rsuffix='tweet_id')[["key","username","value"]].iloc[6:10]
in_d = h.in_degree()
out_d = h.out_degree()
[[key,value] for key, value in out_d ][:5]
key = [key for key, value in out_d ]
value = [value for key, value in out_d ]
out_data = pd.DataFrame({"key":key,"value":value})
out_data.join(data_s, lsuffix='key', rsuffix='tweet_id')[["key","username","value"]].sort_values(by="value",ascending=False).iloc[3:6]
#nx.diameter(h) Found infinite path length because the digraph is not strongly connected
nx.algorithms.reciprocity(h)
cc = nx.closeness_centrality(h)
key = [key for key, value in cc.items() ]
value = [value for key, value in cc.items() ]
cc_data = pd.DataFrame({"key":key,"value":value})
cc_data.sort_values(by="value",ascending=False).head(20)
cc_data.join(data_s, lsuffix='key', rsuffix='tweet_id')[["key","username","value"]].iloc[6:10]
data_s["userlocation"] = data_s["userlocation"].fillna(" ")
places = []
for i in data_s.values:
places.append(GeoText(i[4]))
pl = []
for i in places:
pl.append(list(i.country_mentions.items()))
for i in range(len(pl)):
if(pl[i] != []):
pl[i] = pl[i][0][0]
else:
pl[i] = np.nan
data_s["location"] = pl
data_s.shape
lat = []
lng = []
lat_lng=[]
countries_names=[]
countries = pd.read_csv("countries.csv")
countries.head()
i=0
#data_s = data_s.iloc[:,:]
data_s = data_s.drop(data_s.index[13558])
for d in data_s.values:
if (d[-1] is not np.nan):
lat.append(float(countries[countries["country"] == d[-1]]["latitude"]))
lng.append(float(countries[countries["country"] == d[-1]]["longitude"]))
lat_lng.append((float(countries[countries["country"] == d[-1]]["latitude"]),float(countries[countries["country"] == d[-1]]["longitude"])))
countries_names.append(countries[countries["country"] == d[-1]]["name"].iloc[0])
else :
lat.append(0)
lng.append(0)
countries_names.append(0)
lat_lng.append((0,0))
i=i+1
data_s["lat"] = lat
data_s["lng"] = lng
data_s["lat_lng"]=lat_lng
data_s["country_name"] = countries_names
data_s.shape
from itertools import chain
def draw_map(m, scale=0.2):
# draw a shaded-relief image
m.shadedrelief(scale=scale)
# lats and longs are returned as a dictionary
lats = m.drawparallels(np.linspace(-90, 90, 13))
lons = m.drawmeridians(np.linspace(-180, 180, 13))
# keys contain the plt.Line2D instances
lat_lines = chain(*(tup[1][0] for tup in lats.items()))
lon_lines = chain(*(tup[1][0] for tup in lons.items()))
all_lines = chain(lat_lines, lon_lines)
# cycle through these lines and set the desired style
for line in all_lines:
line.set(linestyle='-', alpha=0.3, color='w')
data_s4 = data_s[data_s["lat_lng"]!=(0,0)]
data_s4.shape
from mpl_toolkits.basemap import Basemap
import random
data_s4.iloc[:,-3]= [x+random.uniform(-5,5) for x in data_s4.iloc[:,-3]]
data_s4.iloc[:,-4] = [x+random.uniform(-5,5) for x in data_s4.iloc[:,-4]]
data_s4.columns
color_map4 = []
for d in data_s4.values:
if (d[10] < -0.1):
color_map4.append("#ff4c4c")
elif (d[10] > 0.1):
color_map4.append("#00cc00")
else:
color_map4.append("#ffffff")
#h2.add_node(d[7],attr_dict={'start' :d[0], 'end' : "2019-01-01 23:37:53"})
#h2.add_node(d[0])
fig = plt.figure(figsize=(25,25))
m = Basemap(projection='cyl')
draw_map(m)
m.scatter(data_s4.iloc[:,-3],data_s4.iloc[:,-4],color=color_map4)
plt.savefig("worldmap_f1.pdf")
Deuxième data sur #JamalKashoggi de 2018-10-22 (avant d'un jour du speech d'Erdogan) à 2018-10-27 :
à peu près 3000 lignes de tweet/retweet
data2 = pd.read_csv("JamalKashoggi3.csv")
data2 = data2.drop_duplicates("tweet_id")
data_s2 = data2[data2["retweeted_status"] != 0]
data_s2 = data_s2.sort_values("created_at",ascending=True)
data_s2["polarity"] = [TextBlob(x).sentiment.polarity for x in data_s2["text"]]
data_s2["subjectivity"] = [TextBlob(x).sentiment.subjectivity for x in data_s2["text"]]
data_s2.shape
data_s2.tail()
h2 = nx.DiGraph()
l2 = []
color_map2 = []
stat = []
for d in data_s2.values:
if (d[10] < -0.1):
color_map2.append("#ff4c4c")
stat.append("negative")
elif (d[10] > 0.1):
color_map2.append("#659D32")
stat.append("positive")
else:
color_map2.append("#99CCFF")
stat.append("neutral")
#h2.add_node(d[7],attr_dict={'start' :d[0], 'end' : "2019-01-01 23:37:53"})
#h2.add_node(d[0])
l2.append((d[4],d[6]))
h2.add_edges_from(l2)
#pos=nx.spring_layout(h)
options2 = {
'line_color': 'grey',
'linewidths': 0,
'width': 0.1,
}
data_s2["stat"] = stat
plt.figure(figsize=(17,17))
nx.draw(h2,**options2,node_color=color_map2,node_size=[v[1]*10 for v in nx.degree(h2)])
#nx.draw_spring(h,**options)
#plt.show()
plt.savefig("graph_f3.png", dpi=1000)
plt.savefig("graph_f3.pdf")
print(nx.info(h2))
nx.density(h2)
plt.bar(["Neutre","Negatif","Positif"],[color_map2.count("#99CCFF"),color_map2.count("#ff4c4c"),color_map2.count("#659D32")])
plt.savefig("plot_f2.png")
bc = nx.betweenness_centrality(h2)
#[[key,value] for key, value in bc.items() ][:5]
key = [key for key, value in bc.items() ]
value = [value for key, value in bc.items() ]
bc_data = pd.DataFrame({"key":key,"value":value})
bc_data.join(data_s2, lsuffix='key', rsuffix='tweet_id')[["key","username","value"]].iloc[1:3]
in_d = h2.in_degree()
out_d = h2.out_degree()
[[key,value] for key, value in out_d ][:5]
key = [key for key, value in out_d ]
value = [value for key, value in out_d ]
out_data = pd.DataFrame({"key":key,"value":value})
out_data.join(data_s2, lsuffix='key', rsuffix='tweet_id')[["key","username","value"]].sort_values(by="value",ascending=False).iloc[3:6]
#nx.diameter(h2) Found infinite path length because the digraph is not strongly connected
#nx.draw(nx.stochastic_graph(h2))
nx.algorithms.reciprocity(h2)
cc = nx.closeness_centrality(h2)
key = [key for key, value in cc.items() ]
value = [value for key, value in cc.items() ]
cc_data = pd.DataFrame({"key":key,"value":value})
cc_data.join(data_s2, lsuffix='key', rsuffix='tweet_id')[["key","username","value"]].iloc[1:3]
communities = community.girvan_newman(h2)
coms = next(communities)
len(coms)
#nx.spring_layout(h2,k=.12)
"""data_s2 = data_s2.sort_values(by="created_at")
data_s2["order"] = [str(i)+",5500" for i in range(2000,2000+len(data_s2))]
data_s2["retweeted_status"] = data_s2["retweeted_status"].astype(str)
data_s2["tweet_id"] = data_s2["tweet_id"].astype(str)
data_s2.to_excel("JamalKashoggi3_forgephi.xlsx",index=False)"""
#nx.write_gexf(h2, "test.gexf")
data2 = pd.read_csv("JamalKashoggi3.csv")
data2 = data2.drop_duplicates("tweet_id")
data_s2 = data2[data2["retweeted_status"] != 0]
data_s2 = data_s2.sort_values("created_at",ascending=True)
data_s2["polarity"] = [TextBlob(x).sentiment.polarity for x in data_s2["text"]]
data_s2["subjectivity"] = [TextBlob(x).sentiment.subjectivity for x in data_s2["text"]]
data_s2["userlocation"] = data_s2["userlocation"].fillna(" ")
data_s2[["username","userlocation"]].head(10)
data_s2.columns
places = []
for i in data_s2.values:
places.append(GeoText(i[8]))
pl = []
for i in places:
pl.append(list(i.country_mentions.items()))
for i in range(len(pl)):
if(pl[i] != []):
pl[i] = pl[i][0][0]
else:
pl[i] = np.nan
data_s2["location"] = pl
data_s2[["username","userlocation","location"]].iloc[20:30]
data_s2.groupby("location").count().sort_values(by="created_at",ascending=False).head(n=10)
#data_s2.groupby("location")["polarity"].mean()
data_s2.columns
lat = []
lng = []
lat_lng=[]
countries_names=[]
countries = pd.read_csv("countries.csv")
countries.head()
for d in data_s2.values:
if (d[12] is not np.nan):
lat.append(float(countries[countries["country"] == d[12]]["latitude"]))
lng.append(float(countries[countries["country"] == d[12]]["longitude"]))
lat_lng.append((float(countries[countries["country"] == d[12]]["latitude"]),float(countries[countries["country"] == d[12]]["longitude"])))
countries_names.append(countries[countries["country"] == d[12]]["name"].iloc[0])
else :
lat.append(0)
lng.append(0)
countries_names.append(0)
lat_lng.append((0,0))
data_s2["lat"] = lat
data_s2["lng"] = lng
data_s2["lat_lng"]=lat_lng
data_s2["country_name"] = countries_names
data_s3 = data_s2[data_s2["lat_lng"]!=(0,0)]
data_s3.shape
data_s3[["username","location","lat","lng"]].iloc[20:30]
from itertools import chain
def draw_map(m, scale=0.2):
# draw a shaded-relief image
m.shadedrelief(scale=scale)
# lats and longs are returned as a dictionary
lats = m.drawparallels(np.linspace(-90, 90, 13))
lons = m.drawmeridians(np.linspace(-180, 180, 13))
# keys contain the plt.Line2D instances
lat_lines = chain(*(tup[1][0] for tup in lats.items()))
lon_lines = chain(*(tup[1][0] for tup in lons.items()))
all_lines = chain(lat_lines, lon_lines)
# cycle through these lines and set the desired style
for line in all_lines:
line.set(linestyle='-', alpha=0.3, color='w')
from mpl_toolkits.basemap import Basemap
import random
data_s3.iloc[:,-3]= [x+random.uniform(-3,3) for x in data_s3.iloc[:,-3]]
data_s3.iloc[:,-4] = [x+random.uniform(-3,3) for x in data_s3.iloc[:,-4]]
data_s3.columns
l3 = []
color_map3 = []
for d in data_s3.values:
if (d[10] < -0.1):
color_map3.append("#ff4c4c")
elif (d[10] > 0.1):
l3.append(d[5])
color_map3.append("#00cc00")
else:
color_map3.append("#ffffff")
fig = plt.figure(figsize=(25,25))
m = Basemap(projection='cyl')
draw_map(m)
m.scatter(data_s3.iloc[:,-3],data_s3.iloc[:,-4],color=color_map3)
plt.savefig("worldmap_f2.pdf")